## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
In this notebook I am conducting an exploratory data analysis focused on the green space deprivation dataset released by Friends of the Earth (referred to as the FoE Green Space dataset). This is a dataset that accompanied the Friends of the Earth “England’s Green Space Gap Report,” published autumn 2020.
My objectives within this notebook are to:
Understand the scope and nature of the FoE Green Space dataset;
Generate and refine interesting questions that can be addressed using the FoE Green Space dataset (and other complementary datasets);
Develop ideas for analysis projects, which could the FoE Green Space dataset and generate outputs which could be used by policy-makers and local authorities with interests in green space and it’s accessibility.
The primary audience for the notebook is myself. As the understanding developed within it will underpin some future data analysis projects.
The notebook might be of interest to companies and individuals recruiting data scientists, and will form part of my portfolio of work.
All data files are in the ‘Data’ folder (i.e. /foe_green_space/Data).
| File Name | Variable name in this notebook | Notes | More details |
|---|---|---|---|
(FOE) Green Space Consolidated Data - England - Version 2.1.xlsx |
green_space | Friends of the Earth | |
Local_Authority_District_to_Region __December_2019__Lookup_in_England.csv |
LAD_to_region | Using the December 2019 version | ons |
Rural_Urban_Classification_(2001)_for_MSOAs _in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv |
urban_rural_ classification |
Using the 2001 version of the data (the most recently available) | ons |
National:
Where is green space deprivation experienced?
Green space deprivation:
What is the scale of the green space deprivation problem in England?
Is green space deprivation an urban problem? 2001 classification?
How do the two dimensions of green space deprivation (availability of garden space and accessibility of public green space) relate to each other?
How is green space deprivation distributed across regions and a local authorities in England?
Who is living with green space deprivation?
Ethnicity and green space deprivation:
Age and green space deprivation:
Wealth and green space deprivation:
The relationship between green space deprivation and health?
Covid and green space deprivation:
Life expectancy and green space deprivation:
Ideas:
green_space <- read_excel("../Data/(FOE) Green Space Consolidated Data - England - Version 2.1.xlsx",
sheet = "MSOAs V2.1") %>%
# inconsistent naming conventions for variables are used in the source data
# some need to clean names for consitency
clean_names()
## New names:
## * `` -> ...1
green_space %>%
select(la_code)
# FOE green space data doesn't include a variable for the region each MSOA/local authority lies within
# So, import a look up table from the ONS, which maps between Local Authority Districts and Regions
LAD_to_region <- read_csv("../Data/Local_Authority_District_to_Region__December_2019__Lookup_in_England.csv") %>%
select(-FID, -LAD19NM)
##
## -- Column specification --------------------------------------------------------
## cols(
## FID = col_double(),
## LAD19CD = col_character(),
## LAD19NM = col_character(),
## RGN19CD = col_character(),
## RGN19NM = col_character()
## )
LAD_to_region
# add region names and codes (from ONS look up) to FOE green space data
green_space <- green_space %>%
left_join(LAD_to_region, by = c("la_code" = "LAD19CD")) %>%
relocate(RGN19CD, RGN19NM, .before = area) %>%
rename(region_code = RGN19CD,
region_name = RGN19NM)
green_space
green_space %>%
tabyl(green_space_deprivation_rating)
https://themockup.blog/posts/2020-09-04-10-table-rules-in-r/ http://applied-r.com/r-color-tables/
to add?
colour_gs_row <- function(df, colour, rating){
df %>%
tab_style(style = list(cell_fill(color = colour_scale[[colour]])),
locations = cells_body(rows = green_space_deprivation_rating == rating)
)
}
colour_gs_rows <- function(df){
df %>%
colour_gs_row(colour = 5, rating = "E") %>%
colour_gs_row(colour = 4, rating = "D") %>%
colour_gs_row(colour = 3, rating = "C") %>%
colour_gs_row(colour = 2, rating = "B") %>%
colour_gs_row(colour = 1, rating = "A")
}
# set green to grey colour scale for tables and graphs
colour_palette <- scales::seq_gradient_pal(low = "palegreen4", high = "grey")
colour_scale <- colour_palette(seq(0, 1, length.out = 5))
background_colour <- "#EDEDED"
# set labels for grouping the rows in the table below
d_e_group_label <- "\nUrgent action needed to improve access to green space"
a_b_c_group_label <- "\nAction needed to protect access to green space"
# for each green space deprivation rate calculate the numbers and proportions of msoas
msoa_by_rating <- green_space %>%
tabyl(green_space_deprivation_rating) %>%
mutate(percent = percent * 100) %>%
rename(n_msoa = n,
percent_msoa = percent)
# for each green space deprivation rate calculate the numbers and proportions of the population
pop_by_rating <- green_space %>%
group_by(green_space_deprivation_rating) %>%
summarise(population = sum(population_imd)) %>%
ungroup() %>%
mutate(percent_pop = population / sum(population) * 100)
## `summarise()` ungrouping output (override with `.groups` argument)
# merge the msoa and population tables
msoa_and_pop_by_rating <- msoa_by_rating %>%
left_join(pop_by_rating, by = c("green_space_deprivation_rating" = "green_space_deprivation_rating"))
msoa_and_pop_by_rating %>%
add_column(group = c(rep(a_b_c_group_label, 3), rep(d_e_group_label, 2))) %>%
arrange(desc(green_space_deprivation_rating)) %>%
gt(rowname_col = "green_space_deprivation_rating", groupname_col = "group") %>%
# add spanners
tab_spanner(label = "Neighbourhoods",
columns = 2:3) %>%
tab_spanner(label = "Population",
columns = 4:5) %>%
# format columns
fmt_number(starts_with("percent_"), decimals = 0) %>%
fmt_number(vars(population), scale_by = 1e-6) %>%
# create and format summary rows for each group
summary_rows(groups = TRUE,
columns = vars(population),
fns = list(Total = ~sum(.)),
formatter = fmt_number,
scale_by = 1e-6) %>%
summary_rows(groups = TRUE,
columns = vars(n_msoa, percent_msoa, percent_pop),
fns = list(Total = ~sum(.)),
formatter = fmt_number,
decimals = 0
) %>%
# colour rows using the function defined above
colour_gs_rows() %>%
# add table labels
tab_stubhead(label = html("Green Space<br>Deprivation<br>Rating")) %>%
cols_label(n_msoa = "Number",
percent_msoa = "%",
population = "Millions of people",
percent_pop = "%") %>%
tab_source_note(source_note = md("**Source:** Friends of the Earth")) %>%
tab_header(title = md("**Green Space Deprivation in England**"),
subtitle = md("Understanding the scale of the problem")
) %>%
# adjust the appearance of table elements
tab_options(heading.align = "left",
heading.subtitle.font.size = 12,
table.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = 3,
column_labels.background.color = background_colour,
summary_row.background.color = background_colour,
row_group.background.color = "#FFFFFF",
stub.background.color = background_colour,
source_notes.font.size = 10) %>%
# adjust styling of table text
tab_style(style = list(cell_text(style = "italic")),
locations = list(cells_body(columns = c(3,5)),
cells_row_groups(),
cells_summary(columns = vars(percent_msoa, percent_pop)))
) %>%
tab_style(style = list(cell_text(weight = "bold")),
locations = list(cells_summary(),
cells_column_labels(everything())
)
) %>%
tab_style(style = list(cell_text(size = 12)),
locations = list(cells_row_groups(),
cells_column_labels(everything()))
) %>%
tab_style(style = list(cell_text(weight = "bold", size = 14)),
locations = list(cells_stubhead(),
cells_column_spanners(spanners = c("Neighbourhoods", "Population")),
cells_stub()
)
) %>%
tab_style(style = list(cell_text(align = "center")),
locations = cells_stub()
)
| Green Space Deprivation in England | ||||
|---|---|---|---|---|
| Understanding the scale of the problem | ||||
| Green Space Deprivation Rating |
Neighbourhoods | Population | ||
| Number | % | Millions of people | % | |
| Urgent action needed to improve access to green space | ||||
| E | 1108 | 16 | 9.62 | 18 |
| D | 955 | 14 | 8.21 | 15 |
| Total | 2,063 | 30 | 17.84 | 33 |
| Action needed to protect access to green space | ||||
| C | 1727 | 25 | 13.54 | 25 |
| B | 1360 | 20 | 10.77 | 20 |
| A | 1641 | 24 | 12.58 | 23 |
| Total | 4,728 | 70 | 36.89 | 67 |
| Source: Friends of the Earth | ||||
# As a starting point - select the three indices (GSDI_...) that make up the rating (and the data that these indices are calculated from)
gs_dep_by_rating <- green_space %>%
select(green_space_deprivation_rating,
gsdi_avg_area, green_space_area_per_capita,
gsdi_access, pcnt_pop_area_with_go_space_access,
gsdi_garden, garden_area_per_capita)
# trying to understand what a typical MSOA in each rating looks like ...
gs_dep_by_rating %>%
group_by(green_space_deprivation_rating) %>%
summarise(mean_gs_per_c = mean(green_space_area_per_capita, na.rm = TRUE),
median_gs_per_c = median(green_space_area_per_capita, na.rm = TRUE),
mean_perc_pop_with_gs_access = mean(pcnt_pop_area_with_go_space_access, na.rm = TRUE),
median_perc_pop_with_gs_access = median(pcnt_pop_area_with_go_space_access, na.rm = TRUE),
mean_gard_area_per_C = mean(garden_area_per_capita, na.rm = TRUE),
median_gard_area_per_C = median(garden_area_per_capita, na.rm = TRUE)
)
## `summarise()` ungrouping output (override with `.groups` argument)
dealing with outliers Boxplot shows outliers at 1.5*IQR + Q3 - they are part of the natural variability of the population, so it seems appropriate to retain the outliers, but zoom on the graphs because the .
box_plot_mapping = aes(x = green_space_area_per_capita,
y = green_space_deprivation_rating)
# may be plotting will help to understand what a typical MSOA in each rating looks like
p <- ggplot(data = gs_dep_by_rating,
mapping = box_plot_mapping
)
p + geom_boxplot(aes(group = green_space_deprivation_rating))
# the outliers (in terms of very high values of green space per capita) are making it difficult to get a handle on the distributions
# so identify the approximate value where outliers begin
p <- ggplot(data = gs_dep_by_rating,
mapping = aes(x = green_space_area_per_capita))
p + geom_boxplot()
# taking the dataset as whole outliers begin at approx. 62.5m2 gs per capita
# this seems like a low value to use as a cut off point for outliers
# so lets look at group A in detail
p <- ggplot(data = gs_dep_by_rating %>%
filter(green_space_deprivation_rating == "A"),
mapping = box_plot_mapping)
p + geom_boxplot() +
coord_cartesian(xlim = c(0, 1000))
# outliers from rating group A start at approximately 500m2 gs per capita
not sure on whether or not to filter out outliers
p <- ggplot(data = gs_dep_by_rating, #%>%
#filter(green_space_area_per_capita < 500),
mapping = box_plot_mapping
)
p + geom_jitter(colour = "grey80") +
geom_boxplot(mapping = aes(group = green_space_deprivation_rating,
fill = green_space_deprivation_rating
),
alpha = 0.5
) +
coord_cartesian(xlim = c(0, 500)) +
guides(fill = FALSE)
gs_pc_summary <- green_space %>%
group_by(green_space_deprivation_rating) %>%
summarise(
min = min(green_space_area_per_capita),
median = median(green_space_area_per_capita),
max = max(green_space_area_per_capita),
iqr = IQR(green_space_area_per_capita),
mean = mean(green_space_area_per_capita),
sd = sd(green_space_area_per_capita)
)
## `summarise()` ungrouping output (override with `.groups` argument)
gs_pc_summary
p <- ggplot(data = gs_dep_by_rating, #%>%
#filter(green_space_area_per_capita < 500),
mapping = aes(x = green_space_area_per_capita,
y = green_space_deprivation_rating,
fill = green_space_deprivation_rating)
)
p + geom_density_ridges(alpha = 0.5) + xlim(0,200) +
guides(fill = FALSE)
## Picking joint bandwidth of 3.84
## Warning: Removed 590 rows containing non-finite values (stat_density_ridges).
So, I wondered if the outliers/very long tail are a result of areas with small populations and/very large areas of green space.
p <- ggplot(data = green_space,
mapping = aes(x = population_imd,
colour = green_space_deprivation_rating)
)
p + geom_point(mapping = aes(y = green_space_area),
alpha = 0.5) +
labs(colour = "GS rating") +
facet_wrap(~green_space_deprivation_rating)
p + geom_point(mapping = aes(y = log(green_space_area)),
alpha = 0.5) +
labs(colour = "GS rating")
p <- ggplot(data = green_space,
mapping = aes(x = log(green_space_area),
)
)
p + geom_density()
## Warning: Removed 115 rows containing non-finite values (stat_density).
p <- ggplot(data = green_space,
mapping = aes(x = population_imd,
)
)
p + geom_density()
So, it looks like the it is the green space area has much more influence on green space area per capita, than population.
p <- ggplot(data = green_space,
mapping = aes(y = green_space_area_per_capita,
colour = green_space_deprivation_rating)
)
p + geom_point(mapping = aes(x = green_space_area),
alpha = 0.5) +
labs(colour = "GS rating") +
facet_wrap(~ green_space_deprivation_rating, scales = "free")
p <- p + geom_point(mapping = aes(x = population_imd),
alpha = 0.5) +
labs(colour = "GS rating")
p
p + ylim(0, 500) + facet_wrap(~ green_space_deprivation_rating, scales = "free_y")
## Warning: Removed 288 rows containing missing values (geom_point).
So, lets look at the distribution of green_space_area itself. This is relatively tricky given the wide range of values (as shown in the summary stats). I tried histograms and density plots too, but a box plot seemed the best way to understand the distribution. The first boxplot shows the full distribution and as a result is very difficult to interpret as the large outliers to the right of plot result in the box itself appearing as a single line and hence being very difficult to interpret. In the second plot hte x axis is cropped so it is straight forward to interpret the box component of the plot. However, this comes at the cost of failing to show the very large outliers within the distribution.
The extreme skew of the distribution can be seen in the summary statistics below. The median for green_space_area is 152,418 m2 while the maximum 636,087,671 m2.
gs_area_summary <- function(variable, xlim){
print(green_space %>% select({{variable}}) %>% summary())
p <- ggplot(data = green_space,
mapping = aes(x = {{variable}}))
plot(p + geom_boxplot())
p + geom_boxplot() +
coord_cartesian(xlim = xlim)
}
gs_area_summary(green_space_area, c(0, 1e6))
## green_space_area
## Min. : 0
## 1st Qu.: 64273
## Median : 152418
## Mean : 1725346
## 3rd Qu.: 371044
## Max. :636087671
# trying different lengths of the x axis to try to understand the distribution
# p <- p +
# geom_boxplot()
#
# c(10, 100, 1000, 10000, 100000, 1e6, 1e7, 1e8, 1e9) %>%
# map(~ p + coord_cartesian(xlim = c(0, .x)))
A similarly extreme right skewed distribution can seen for green_space_area_per_capita, as shown in the plots and summary stats below. It is worth noting just how atypical many of the larger outliers are. The median for green_space_area_per_capita is less than 20 m2 per capita, while the maximum is approximately 100,000 m2 per capita.
gs_area_summary(green_space_area_per_capita, c(0, 100))
## green_space_area_per_capita
## Min. : 0.00
## 1st Qu.: 8.19
## Median : 19.17
## Mean : 244.72
## 3rd Qu.: 48.03
## Max. :101465.57
So, I thought it was worth a quick look at the population density across English MSOAs. The first graph shows the kernel density function for the population density of English MSOAs. Key features of the distribution include:
The second plot groups MSOAs by their FoE green space deprivation rating and highlights:
green_space %>%
select(population_density) %>%
summary()
## population_density
## Min. :5.557e-06
## 1st Qu.:7.344e-04
## Median :2.623e-03
## Mean :3.458e-03
## 3rd Qu.:4.655e-03
## Max. :2.737e-02
p <- ggplot(data = green_space,
mapping = aes(x = population_density
)
)
p + geom_density()
p + geom_density(mapping =aes(fill = green_space_deprivation_rating),
alpha = 0.4)
Plotting population density against green space area and green space area per capita produces very associations. Note the log scales on both the x and y axis in both cases.
p <- ggplot(data = green_space,
mapping = aes(x = log(population_density),
colour = green_space_deprivation_rating
)
)
p + geom_point(mapping = aes(y = log(green_space_area)),
alpha = 0.25) +
labs(colour = "GS rating")
p + geom_point(mapping = aes(y = log(green_space_area_per_capita)),
alpha = 0.25) +
labs(colour = "GS rating")
From histogram:
pcnt_pop_with_go_space_access is uniform.pcnt_pop_with_go_space_access and frequency.from the boxplot with grouping by rating
Overall it is not clear to what extent pcnt_pop_with_go_space_access is influencing the ratings … would dropping it make much difference to how MSOA are classified?
75% pcnt_pop_with_go_space_access is used as a cut off point for some classifications. This figure seems high, it is at approximately the 95th percentile (see calculation below).
rating_component_plots <- function(){
# looking at the distribution of rating_component
plot(p + geom_density())
plot(p + geom_density(mapping = aes(fill = green_space_deprivation_rating),
alpha = 0.4))
plot(p + geom_boxplot())
plot(p + geom_histogram())
# break down prating_component distribution by green space deprivation rating
p + geom_jitter(mapping = aes(y = green_space_deprivation_rating),
alpha = 0.2,
colour = "grey50") +
geom_boxplot(mapping = aes(y = green_space_deprivation_rating,
fill = green_space_deprivation_rating), alpha = 0.5) +
guides(fill = FALSE)
}
p <- ggplot(data = green_space,
mapping = aes(x = pcnt_pop_with_go_space_access)
)
rating_component_plots()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# looking at assocation between pcnt_pop_with_go_space_access and the other
# two variables that inform the green space deprivation rating
p + geom_point(mapping = aes(y = log(green_space_area_per_capita),
colour = green_space_deprivation_rating)
)
p + geom_point(mapping = aes(y = log(garden_area_per_capita),
colour = green_space_deprivation_rating)
)
# calculate the percentile of the cutoff value used in FoE classification
percentile <- ecdf(green_space$pcnt_pop_with_go_space_access)
percentile(75)
## [1] 0.9471359
green_space
grouped_summary <- function(df, grouping_var, summary_var){
df %>%
group_by({{grouping_var}}) %>%
summarise(min = min({{summary_var}}),
median = median({{summary_var}}),
max = max({{summary_var}}),
iqr = IQR({{summary_var}}),
mean = mean({{summary_var}}),
sd = sd({{summary_var}})
)
}
grouped_summary(df = green_space,
grouping_var = green_space_deprivation_rating,
summary_var = pcnt_pop_with_go_space_access) %>%
gt()
## `summarise()` ungrouping output (override with `.groups` argument)
| green_space_deprivation_rating | min | median | max | iqr | mean | sd |
|---|---|---|---|---|---|---|
| A | 0.6317776 | 20.96466 | 96.57368 | 29.79130 | 26.36973 | 19.96222 |
| B | 25.0390119 | 45.67634 | 100.00000 | 26.67371 | 49.46104 | 18.23865 |
| C | 0.0000000 | 21.92089 | 90.81132 | 33.62711 | 28.72732 | 22.05134 |
| D | 2.0380275 | 45.00738 | 99.50842 | 29.69103 | 46.82851 | 19.52741 |
| E | 0.0000000 | 24.18513 | 100.00000 | 30.29954 | 28.92837 | 21.22944 |
# produce plots for garden_area_per_capita
p <- ggplot(data = green_space %>%
filter(garden_area_per_capita < 4000),# remove one outlier around
# 5000m2 to make plots readable
mapping = aes(x = garden_area_per_capita)
)
rating_component_plots()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#
# #
# p <- ggplot(data = green_space,
# mapping = aes(x = log(garden_area_per_capita))
# )
#
# rating_component_plots()
grouped_summary(df = green_space,
grouping_var = green_space_deprivation_rating,
summary_var = garden_area_per_capita) %>%
gt()
## `summarise()` ungrouping output (override with `.groups` argument)
| green_space_deprivation_rating | min | median | max | iqr | mean | sd |
|---|---|---|---|---|---|---|
| A | 85.9232540 | 195.80436 | 822.19669 | 171.74319 | 224.52812 | 119.28676 |
| B | 3.0706027 | 91.22088 | 5647.41855 | 43.62001 | 96.63223 | 156.75402 |
| C | 56.9000926 | 93.86037 | 598.47099 | 67.71153 | 121.12249 | 72.93403 |
| D | 1.8816985 | 49.97942 | 85.89009 | 30.84960 | 49.79282 | 20.31293 |
| E | 0.2565869 | 33.54458 | 85.33891 | 36.07619 | 36.97237 | 22.67188 |
Rural-urban classification at LA scale
https://www.gov.uk/government/statistics/local-authority-rural-urban-classification Rural-Urban Classification of Local Authorities Post-2009 Boundaries
readxl::read_xls("../Data/la-class-dataset-post0409.xls", sheet = "England")
Rural-urban classication at MSOA scale
file_loc <- "../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv"
urban_rural_classification <- readr::read_csv(file_loc) %>%
clean_names() %>%
select(msoa01cd, ruc01nm, ruc01cd, morphology_name, morphology_code, context_name, context_code)
##
## -- Column specification --------------------------------------------------------
## cols(
## GOR01CD = col_character(),
## GOR01NM = col_character(),
## CTY01CD = col_logical(),
## CTY01NM = col_logical(),
## LAD01CD = col_character(),
## LAD01NM = col_character(),
## MSOA01CD = col_character(),
## MSOA01NM = col_character(),
## RUC01NM = col_character(),
## RUC01CD = col_double(),
## `Morphology Name` = col_character(),
## `Morphology Code` = col_double(),
## `Context Name` = col_character(),
## `Context Code` = col_double()
## )
## Warning: 6366 parsing failures.
## row col expected actual file
## 3599 CTY01CD 1/0/T/F/TRUE/FALSE 09 '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## 3599 CTY01NM 1/0/T/F/TRUE/FALSE Bedfordshire '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## 3600 CTY01CD 1/0/T/F/TRUE/FALSE 09 '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## 3600 CTY01NM 1/0/T/F/TRUE/FALSE Bedfordshire '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## 3601 CTY01CD 1/0/T/F/TRUE/FALSE 09 '../Data/Rural_Urban_Classification_(2001)_for_MSOAs_in_England_and_Wales/RUC_MSOA_2001_EW_LU.csv'
## .... ....... .................. ............ ..................................................................................................
## See problems(...) for more details.
urban_rural_classification
# check if any of the parsing problems relate to the columns of interest
columns_of_interest <- c("MSOA01CD", "RUC01NM", "RUC01CD", "Morphology Name",
"Morphology Code", "Context Name", "Context Code")
problems(urban_rural_classification) %>%
filter(col %in% columns_of_interest)
green_space_urban_rural <- green_space %>%
left_join(urban_rural_classification, by = c("msoa_code" = "msoa01cd"))
green_space_urban_rural
# p <- ggplot(data = green_space_urban_rural,
# mapping = aes(y = green_space_deprivation_rating))
#
# p + geom_tile(mapping = aes(x = ruc01cd))
green_space_urban_rural %>%
tabyl(green_space_deprivation_rating, morphology_name) %>%
adorn_totals("row") %>%
adorn_percentages("col") %>%
adorn_pct_formatting(rounding = "half up", digits = 0) %>%
adorn_ns() %>%
gt()
| green_space_deprivation_rating | Town and Fringe | Urban > 10K | Village Hamlet & Isolated Dwellings | NA_ |
|---|---|---|---|---|
| A | 51% (309) | 14% (747) | 84% (566) | 13% (19) |
| B | 7% (41) | 24% (1287) | 0% (0) | 21% (32) |
| C | 41% (248) | 25% (1345) | 16% (110) | 16% (24) |
| D | 1% (6) | 17% (923) | 0% (0) | 17% (26) |
| E | 0% (2) | 20% (1056) | 0% (0) | 33% (50) |
| Total | 100% (606) | 100% (5358) | 100% (676) | 100% (151) |
# gt() %>%
# grand_summary_rows(columns = vars(`Town and Fringe`, `Urban > 10K`,
# `Village Hamlet & Isolated Dwellings`, NA_),
# fns = c(total = ~sum(.))
# )
Some thoughts on where I am in understanding the FoE ratings and green_space_area:
green_space_area and green_space_area_per_capita it doesn’t make sense to me to talk solely about green space deprivation. There are clearly places that are green space affluent … For example, the typical (median) amount of green space area per capita for an MSOA is 20 m2. Whilst, the MSOA with the most green space per capita has approximately 5000 times more green space per capita than the typical MSOA. But I guess FoE are following the terminology/approach of the Indices of Multiple Deprivation dataset provided by the ONS.green_space_area_per_capita for MSOAs in each rating (as shown in the table below) raises a question in my mind as the amounts of green_space_area_per_capita are relatively similar across ratings E to B … Is it a different experience to live in a neighborhood with approximately 3m2 green_space_area_per_capita (as is typical for a E rated MSOA) or with approximately 16m2 green_space_area_per_capita (as is typical for a E rated MSOA)? Which got me wondering how much green space is enough (Russo and Cirella 2018) - it appear that this is a question that hasn’t been the subject of too much research to date …gs_pc_summary
gs_pc_summary %>%
select(green_space_deprivation_rating, median, iqr)
green_space %>%
select(garden_area_per_capita) %>%
summary()
## garden_area_per_capita
## Min. : 0.257
## 1st Qu.: 56.878
## Median : 85.890
## Mean : 117.445
## 3rd Qu.: 138.730
## Max. :5647.419
green_space
p <- ggplot(data = green_space,
mapping = aes(x = green_space_area_per_capita,
y = garden_area_per_capita)
)
p + geom_point()
p <- ggplot(data = green_space_urban_rural)
p + geom_density(mapping = aes(x = garden_area_per_capita))
p + geom_density(mapping = aes(x = log(green_space_area_per_capita + 1)))
p + geom_density(mapping = aes(x = log(garden_area_per_capita + 1)))
p + geom_density(mapping = aes(x = pcnt_pop_area_with_go_space_access))
I think it might be actively helpful to remove the right skew from the two green space variables in the context of looking at green space deprivation. Why?
The the data from all three variables definitely needs transforming to lie on a scale of 0 to 1, to ensure that the kmeans algorithm applies a roughly equal weight to each variable. When putting the raw data into kmeans, green_space_area_per_capita is the predominant factor in determining clusters due to the fact that it’s values are much larger than those of the other variables …
My initial efforts in transforming the data - a log transformation and then scaling values to the unit interval (i.e. 0…1) - proved rather unsuccessful. See the summary stats below, with the transformed values remaining tightly grouped together around the median.
library(tidymodels)
## -- Attaching packages -------------------------------------- tidymodels 0.1.2 --
## v broom 0.7.2 v recipes 0.1.15
## v dials 0.0.9 v rsample 0.0.8
## v infer 0.5.3 v tune 0.1.2
## v modeldata 0.1.0 v workflows 0.2.1
## v parsnip 0.1.4 v yardstick 0.0.7
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x scales::discard() masks purrr::discard()
## x dplyr::filter() masks stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::lag() masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step() masks stats::step()
library(tidyverse)
library(skimr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
green_space_urban_rural %>%
distinct(morphology_name, morphology_code)
# select variables of interest
cluster_variables <- green_space_urban_rural %>%
select(msoa_name,
green_space_area_per_capita,
pcnt_pop_area_with_go_space_access,
garden_area_per_capita)
unit_interval <- function(x){
(x - min(x)) / (max(x) - min(x))
}
# transform variable to try to remove skew
cluster_variables %>%
mutate(log_green_space_area_per_capita = log(green_space_area_per_capita + 1),
log_garden_area_per_capita = log(garden_area_per_capita + 1), # +1 require because log(0) is not defined
ui_log_green_space_area_per_capita = unit_interval(log_green_space_area_per_capita),
ui_log_garden_area_per_capita = unit_interval(log_garden_area_per_capita)) %>%
skim() %>%
as_tibble()
So, I wondered about focusing on a subset of the data which could be easier to work with. Perhaps given the focus on green space deprivation it makes sense to remove those clearly green space affluent MSOAs (e.g. those with 10,000’s m2 public green space per capita)
green_space_urban_rural %>%
ggplot(mapping = aes(x = green_space_area_per_capita,
y = morphology_name,
fill = morphology_name)) +
geom_boxplot()
green_space_urban_rural %>%
skim(green_space_area_per_capita) %>%
as_tibble() %>%
clean_names()
green_space_urban_rural %>%
ggplot(mapping = aes(x = green_space_area_per_capita,
y = morphology_name,
fill = morphology_name)) +
geom_boxplot() +
coord_cartesian(xlim = c(0, 1000))
green_space_urban_rural %>%
ggplot(mapping = aes(x = garden_area_per_capita,
y = morphology_name,
fill = morphology_name)) +
geom_boxplot() +
coord_cartesian(xlim = c(0, 1000))
green_space_urban_rural %>%
ggplot(mapping = aes(x = pcnt_pop_area_with_go_space_access,
y = morphology_name,
fill = morphology_name)) +
geom_boxplot()
green_space_urban_rural %>%
filter(green_space_area_per_capita < 10000) %>%
filter(garden_area_per_capita < 1000) %>%
ggplot(mapping = aes(x = log(green_space_area_per_capita),
y = log(garden_area_per_capita),
colour = morphology_name)) +
geom_point() +
facet_wrap(~morphology_name)
# check for na's
cluster_variables %>%
summarise(across(.cols = everything(),
.fns = ~sum(is.na(.))
)
)
# remove labeling columns ahead running clustering algorithm
data_points <- cluster_variables %>%
select(-msoa_name)
kclust <- kmeans(data_points, centers = 3)
# examine outputs
kclust
## K-means clustering with 3 clusters of sizes 4, 31, 6756
##
## Cluster means:
## green_space_area_per_capita pcnt_pop_area_with_go_space_access
## 1 71978.9107 32.43886
## 2 19425.8162 26.68028
## 3 114.2361 31.99417
## garden_area_per_capita
## 1 264.6099
## 2 271.8511
## 3 116.6490
##
## Clustering vector:
## [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [38] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [75] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [112] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [149] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [186] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [223] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [260] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [297] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [334] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [371] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [408] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [445] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [482] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [519] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [556] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [593] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [630] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [667] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [704] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [741] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [778] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [815] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [852] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [889] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [926] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [963] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1000] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1037] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1074] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1111] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1148] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1185] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1222] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1259] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1296] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1333] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1370] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1407] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1444] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1481] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1518] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1555] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1592] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1629] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1666] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1703] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1740] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1777] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1814] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1851] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1888] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1925] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1962] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [1999] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2036] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2073] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2110] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2147] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2184] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2221] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2258] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2295] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2332] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2369] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2406] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2443] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2480] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2517] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2554] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2591] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2628] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2665] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2702] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2739] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2776] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2813] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2850] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2887] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2924] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2961] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [2998] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3035] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3072] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3109] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3146] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3183] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3220] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3257] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3294] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3331] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3368] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3405] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3442] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3479] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3516] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3553] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3590] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3627] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3664] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3701] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3738] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3775] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3812] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3849] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 2 3 3 3 3 2 3 3 3
## [3886] 3 3 3 3 3 3 3 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 3 3 3 2 2 2 2 3
## [3923] 3 3 3 3 3 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3960] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [3997] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4034] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4071] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4108] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 2 3 3 3 3 3 3 3 3 3 3
## [4145] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4182] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4219] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 2 3 3 3 3 3 3 3
## [4256] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4293] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4330] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4367] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4404] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4441] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4478] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4515] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4552] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4589] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4626] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4663] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4700] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4737] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4774] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4811] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4848] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4885] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4922] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4959] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [4996] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5033] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5070] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5107] 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5144] 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5181] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5218] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5255] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5292] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5329] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5366] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5403] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5440] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5477] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5514] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5551] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 2 3 3 3 3 3 3
## [5588] 3 3 3 3 3 3 3 3 3 3 3 3 1 2 3 3 3 3 2 3 3 3 3 3 3 3 3 2 1 3 2 3 3 3 3 3 3
## [5625] 3 3 3 3 3 3 3 3 3 2 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 2
## [5662] 3 3 3 3 3 2 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5699] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5736] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5773] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5810] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5847] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5884] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5921] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5958] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3
## [5995] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6032] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6069] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6106] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6143] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6180] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6217] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6254] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6291] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6328] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6365] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6402] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6439] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6476] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6513] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6550] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6587] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6624] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6661] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6698] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6735] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [6772] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##
## Within cluster sum of squares by cluster:
## [1] 1566598682 1859441226 1868590580
## (between_SS / total_SS = 85.8 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
summary(kclust)
## Length Class Mode
## cluster 6791 -none- numeric
## centers 9 -none- numeric
## totss 1 -none- numeric
## withinss 3 -none- numeric
## tot.withinss 1 -none- numeric
## betweenss 1 -none- numeric
## size 3 -none- numeric
## iter 1 -none- numeric
## ifault 1 -none- numeric
# add cluster identifier to the source data
augment(kclust, data_points)
# summarise per cluster
tidy(kclust)
glance(kclust)
# subset variables to include in clustering
data_points_subset <- data_points %>%
select(green_space_area_per_capita,
garden_area_per_capita)
data_points_subset %>%
summary()
## green_space_area_per_capita garden_area_per_capita
## Min. : 0.00 Min. : 0.257
## 1st Qu.: 8.19 1st Qu.: 56.878
## Median : 19.17 Median : 85.890
## Mean : 244.72 Mean : 117.445
## 3rd Qu.: 48.03 3rd Qu.: 138.730
## Max. :101465.57 Max. :5647.419
data_points_subset
# run k_means clustering for k = 1 ... 9
# and for each cluster create outputs
kclusts <- tibble(k = 1:9) %>%
mutate(k_clust = map(k, ~kmeans(data_points_subset, .x)),
tidied = map(k_clust, tidy),
glanced = map(k_clust, glance),
augmented = map(k_clust, augment, data_points_subset)
)
kclusts
# separate outputs for use in plotting
clusters <- kclusts %>%
unnest(cols = c(tidied))
clusters
assignments <- kclusts %>%
unnest(cols = c(augmented))
assignments
clusterings <- kclusts %>%
unnest(cols = c(glanced))
clusterings
p1 <- ggplot(data = assignments %>%
filter(garden_area_per_capita < 1200), # remove single outlier at approx 5000 m2
mapping = aes(x = green_space_area_per_capita,
y = garden_area_per_capita)
)
p1 + geom_point(mapping = aes(colour = .cluster)) +
facet_wrap(~ k)
gs_bar <- function(){
list(
geom_col(),
scale_fill_manual(values = colour_scale),
geom_text(mapping = aes(label = bar_label), hjust = 1.1, colour = "white", size = 3.5),
guides(fill = FALSE),
theme(axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold", size = 15),
plot.title = element_text(face = "bold", size = 15)
),
coord_flip()
)
}
bar_labels <- c("Large or very large gardens and large or \nvery large amounts of public green space",
"",
"",
"",
"Very small gardens and very small\namounts of public green space")
x_lab <- "Green space deprivation rating\n\n"
caption_lab <- "Source: Friends of the Earth, England's Green Space Gap"
p <- ggplot(data = msoa_and_pop_by_rating,
mapping = aes(x = green_space_deprivation_rating,
y = n_msoa,
fill = green_space_deprivation_rating)
)
p + gs_bar() +
labs(title = "Numbers of neighbourhoods in England by green space\ndeprivation rating\n",
x = x_lab,
y = "\nNumber of Neighbourhoods\n",
caption = caption_lab)
Below I plot the proportions of the 6791 MSOAs analyzed given each green space deprivation rating.
p <- ggplot(data = msoa_and_pop_by_rating,
mapping = aes(x = green_space_deprivation_rating,
y = prop_msoa,
fill = green_space_deprivation_rating)
)
p + gs_bar() +
labs(title = "Proportion of neighbourhoods in England by green space\ndeprivation rating\n",
x = x_lab,
y = "Proportion of neighbourhoods",
caption = caption_lab)
msoa_by_rating
p <- ggplot(data = msoa_and_pop_by_rating,
mapping = aes(x = green_space_deprivation_rating,
y = population,
fill = green_space_deprivation_rating)
)
p + gs_bar() +
labs(title = "The population living within neighbourhoods with each\ngreen space deprivation rating\n",
y = "\nPopulation",
x = x_lab)
The two plots below, respectively show for each region the numbers or proportion of MSOA with each green space deprivation rating. Key insights from the two plots include:
msoa_by_region_and_rating <- green_space %>%
# count by region and rating
group_by(region_name, green_space_deprivation_rating) %>%
summarise(n_msoa = n()) %>%
filter(!is.na(region_name)) %>%
ungroup() %>%
# calculate rating proportions by region
group_by(region_name) %>%
mutate(prop = n_msoa / sum(n_msoa)) %>%
ungroup()
msoa_by_region_and_rating
plot_msoas_by_region <- function(y_var){
p <- ggplot(data = msoa_by_region_and_rating,
mapping = aes(x = green_space_deprivation_rating, y = .data[[y_var]])
)
p + geom_bar(mapping = aes(fill = green_space_deprivation_rating),
stat = "identity",
colour = "grey80") +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1) +
guides(fill = FALSE) +
coord_flip() +
facet_wrap(~ region_name, ncol = 3)
}
plot_msoas_by_region("n_msoa")
plot_msoas_by_region("prop")
Below I plot again plot the proportion of MSOAs within each region receiving each green space deprivation rating. This time faceting the plot by green space deprivation rating rather than region. This makes it easier to compare across regions at a given rating.
It would be easier to read if I produced separate plots for each green space deprivation rating, as then the regions could be put in rank order.
prop_a <- msoa_by_region_and_rating %>%
filter(green_space_deprivation_rating == "A") %>%
mutate(prop_a = prop) %>%
select(region_name, prop_a)
msoa_by_region_and_rating <- msoa_by_region_and_rating %>%
left_join(prop_a)
msoa_by_region_and_rating
p <- ggplot(data = msoa_by_region_and_rating,
mapping = aes(x = reorder(region_name, prop_a), y = prop, fill = region_name)
)
p + geom_bar(stat = "identity") +
facet_wrap(~ green_space_deprivation_rating) +
guides(fill = FALSE) +
coord_flip()
Next I explored an alternative appraoch to considering the distribution of msoas by rating and region. I plotted the proportion of MSOAs that received a specific rating by region. In order to address the ordering issue above, I produced one plot for each green space rating. This meant addressing the challenge of how to ensure the colour associated with a given region was applied consistently across the five plots. Here is where I found the an approach to doing this, using scale_…_manual.
How to map a colour to a value of a categorical variable …
This approach help me identify some additional insights:
# The idea is to produce one bar chart for each green space rating, and across these plots use the same colour for a given region.
# choose a palette to work with
region_pal <- brewer.pal(9, "Paired")
# map the region names to a specific colour value (in a dataframe for ease)
region_colours_df <- msoa_by_region_and_rating %>%
select(region_name) %>%
distinct(region_name) %>%
add_column(colour = region_pal)
# convert the region-colour mapping from a dataframe to a vector
# because a vector is required by scale_fill_manual
region_colours_vector <- region_colours_df$colour
names(region_colours_vector) <- region_colours_df$region_name
region_colours_vector
plot_gsdr_prop_by_region <- function(rating = "A"){
msoa_by_region_and_rating %>%
# focus on the rating of interest
filter(green_space_deprivation_rating == rating) %>%
mutate(proportion = n_msoa / sum(n_msoa)) %>%
# create the bar chart
ggplot(mapping = aes(x = reorder(region_name, proportion),
y = proportion,
fill = region_name
)
) +
geom_col() +
# apply region-colour mapping
scale_fill_manual(values = region_colours_vector) +
guides(fill = FALSE) +
labs(title = str_c("MSOAs receiving green space deprivation rating: ", rating),
x = NULL,
y = str_c("Proportion of the MSOAs that recieved a rating of ", rating)
) +
coord_flip()
}
# produce one plot for each of the five green space deprivation ratings
gsdr_prop_by_region_plots <- c("A", "B", "C", "D", "E") %>%
map(~ plot_gsdr_prop_by_region(.x))
gsdr_prop_by_region_plots
d_plot <- gsdr_prop_by_region_plots[[4]] +
ylim(0, 0.45) # ensure the plot axis scales match
e_plot <- gsdr_prop_by_region_plots[[5]] +
ylim(0, 0.45) # ensure the plot axis scales match
plot_grid(d_plot, e_plot, ncol = 2)
Could do ridgeline plots for each region - for prop rated A and proportion rated E
https://datacarpentry.org/r-raster-vector-geospatial/06-vector-open-shapefile-in-r/
A quick visual inspection of the MSOAs colored by their green space deprivation rating, shows a similiar patter across the regions (with the exception of London). With the the D and E ratings (oranges and reds) occurring in smaller (presumably more densely populated MSOAs) which make up urban areas. While the larger, more rural MSOAs tend to be less green space deprived, and have A or B ratings. Given the whole region of London would probably be considered a continuous urban space, it is unsurprising to observe many green space deprived MSOAs across the region/plot, with relatively few less green space deprived areas present.
green_space_region_plot <- function(region){
p <- ggplot(data = msoa_sf_gs %>%
filter(region_name == region),
mapping = aes(fill = green_space_deprivation_rating)
)
p + geom_sf(colour = "grey75") +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1) +
labs(title = str_c("...", region))
}
region_names <- green_space %>%
select(region_name) %>%
distinct(region_name)
region_names %>%
filter(!is.na(region_name)) %>%
pmap(~ green_space_region_plot(region = .x))
green_space <- green_space %>%
mutate(prop_BAME_pop = BAME_Pop / Total_Pop_From_Ethnicity_Data) %>%
relocate(prop_BAME_pop, .after = BAME_Pop)
p <- ggplot(data = green_space,
mapping = aes(x = green_space_deprivation_rating,
y = prop_BAME_pop)
)
p + geom_jitter(alpha = 0.5, colour = "grey75") +
geom_boxplot(mapping = aes(fill = green_space_deprivation_rating),
alpha = 0.5,
size = 1.25,
colour = "grey25") +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1) +
guides(fill = FALSE) +
coord_flip()
p <- ggplot(data = green_space,
mapping = aes(y = green_space_deprivation_rating,
x = prop_BAME_pop,
fill = green_space_deprivation_rating)
)
p + geom_density_ridges(alpha = 0.6, scale = 1.5) +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1)
p <- ggplot(data = msoa_by_rating,
mapping = aes(x = green_space_deprivation_rating, y = n_msoa)
)
p + geom_segment(mapping = aes(x = green_space_deprivation_rating,
xend = green_space_deprivation_rating,
y = 750,
yend = n_msoa),
colour = "grey50",
size = 1.5
) +
geom_point(mapping = aes(fill = green_space_deprivation_rating),
size = 7.5,
shape = 21,
colour = "grey50",
stroke = 1.25
) +
scale_fill_brewer(type = "div",
palette = "RdYlGn",
direction = -1) +
labs(title = title_lab,
subtitle = subtitle_lab,
x = x_lab,
y = y_lab,
caption = caption_lab) +
guides(fill = FALSE) +
# facet_wrap(~ green_space_deprivation_rating) +
coord_flip()